### Load in price data ----
wti = fread(data.dir%&%'/Energy Prices/DCOILWTICO.csv')
brent = fread(data.dir%&%'/Energy Prices/POILBREUSDM.csv')
cpi = fread(data.dir%&%'/Energy Prices/CPIAUCSL.csv')
hh = fread(data.dir%&%'/Energy Prices/PNGASUSUSDM.csv')

prices = merge(wti, brent, by='DATE', all.x=TRUE)
prices = merge(prices, hh, by='DATE', all.x=TRUE)
prices = merge(prices, cpi, by='DATE', all.x=TRUE)
setnames(prices, c('DATE','WTI','BRENT','HH','CPI'))
setnames(prices, tolower(names(prices)))
prices[, date:=as.Date(date)]
prices[, hh:=as.numeric(hh)]
sapply(prices, class)
cpi.base = prices[date=='2020-01-01', cpi]
prices[, wti := wti*cpi.base/cpi]
prices[, brent := brent*cpi.base/cpi]
prices[, hh := hh*cpi.base/cpi]
prices[, d.ln.wti :=c(NA, diff(log(wti)))]
prices[, d.ln.hh :=c(NA, diff(log(hh)))]
prices = prices[complete.cases(hh)]

#### Examine spuds vs prices. ----
# dt[, spud_month := spud_date - day(spud_date) + 1]
spudcounts = dt[, .N, by=.(prod_type, fed.land, offshore, spud_month)][order(fed.land, prod_type, spud_month)]

spudcounts[offshore==1 & fed.land==0][year(spud_month)==2019]
# missing some obs for offshore, when no wells were drilled. fill these in.

spud.idx = CJ(spud_month=spudcounts[, unique(spud_month)],
              fed.land=spudcounts[, unique(fed.land)],
              offshore=spudcounts[, unique(offshore)],
              prod_type=spudcounts[, unique(prod_type)])

spudcounts = merge(spudcounts, spud.idx, by=c('prod_type','fed.land','offshore','spud_month'), all.y=TRUE)
spudcounts[is.na(N), N:=0] # if not in original spudcount set, it means it was zero.

spudcounts[offshore==1 & fed.land==0][year(spud_month)==2019]

# Fill in lagged differences
spudcounts[, d.ln.N := c(NA, diff(log(N+1))), by=.(prod_type, fed.land, offshore)]

spudcounts = spudcounts[!is.na(spud_month)]
spudcounts = spudcounts[spud_month>='1990-01-01']
spudcounts = spudcounts[spud_month<'2019-03-01']

# Output nice plot of O/G & F/NF onshore wells, against O&G prices
my.cols = alpha(col_lookup[4:1], 1)
names(my.cols) = names(col_lookup[4:1])

# Create a time series of wells w/o any gas in the first 12 months
dt.nogas = dt[gas_cum==0]
dt[, mean(gas_cum==0), by=prod_type]
dt[, mean(liq_cum==0), by=prod_type]
# dt.nogas = dt[liq_cum==0] # for "no oil"
spudcounts.nogas = dt.nogas[, .N, by=.(prod_type, fed.land, offshore, spud_month)][order(fed.land, prod_type, spud_month)]
spudcounts.nogas = spudcounts.nogas[!is.na(spud_month)]
spudcounts.nogas = spudcounts.nogas[spud_month %in% spudcounts[, unique(spud_month)]]
rm(dt.nogas)
spudcounts.nogas = merge(spudcounts.nogas, spud.idx, by=c('prod_type','fed.land','offshore','spud_month'), all.y=TRUE)
spudcounts.nogas[is.na(N), N:=0] # if not in original spudcount set, it means it was zero.
spudcounts.nogas[, d.ln.N := c(NA, diff(log(N+1))), by=.(prod_type, fed.land, offshore)]
spudcounts.nogas = spudcounts.nogas[spud_month>='1990-01-01']
spudcounts.nogas = spudcounts.nogas[spud_month<'2019-03-01']

# Calculate average as share, merge in
dt[, gas.share := (gas_cum*1.036)/(gas_cum*1.036 + liq_cum*5.8)]
dt[is.na(gas.share), .(gas_cum, liq_cum)]
dt[gas_cum*1.036==liq_cum*5.8, gas.share := 0.5] # only changes things for dry holes
dt[is.na(gas.share)]

gas.shares = dt[, .(gas.share=mean(gas.share, na.rm=T)), by=.(prod_type, fed.land, offshore, spud_month)]
dt[, gas.share.med := median(gas.share), by=.(prod_type, fed.land, offshore)]
dt[prod_type=='Gas' & fed.land==0 & offshore==0, .(gas.share.med)] # note: onshore nonfederal gas wells, more than 1/2 of wells 
dt[, high.gas.share := 1*(gas.share>=gas.share.med)]
dt[, mean(high.gas.share), by=.(prod_type, fed.land, offshore)]
spudcounts.highgas = dt[high.gas.share==1, .(N.highgas=.N), by=.(prod_type, fed.land, offshore, spud_month)]
spudcounts.highgas = merge(spudcounts.highgas, spud.idx, by=c('prod_type','fed.land','offshore','spud_month'), all.y=TRUE)
spudcounts.highgas[is.na(N.highgas), N.highgas:=0] # if not in original spudcount set, it means it was zero.
spudcounts.highgas[, d.ln.N.highgas := c(NA, diff(log(N.highgas+1))), by=.(prod_type, fed.land, offshore)]

spudcounts.lowgas = dt[high.gas.share==0, .(N.lowgas=.N), by=.(prod_type, fed.land, offshore, spud_month)]
spudcounts.lowgas = merge(spudcounts.lowgas, spud.idx, by=c('prod_type','fed.land','offshore','spud_month'), all.y=TRUE)
spudcounts.lowgas[is.na(N.lowgas), N.lowgas:=0] # if not in original spudcount set, it means it was zero.
spudcounts.lowgas[, d.ln.N.lowgas := c(NA, diff(log(N.lowgas+1))), by=.(prod_type, fed.land, offshore)]

spudcounts = merge(spudcounts, gas.shares, by=c('prod_type', 'fed.land', 'offshore', 'spud_month'), all.x=TRUE)
spudcounts = merge(spudcounts, spudcounts.highgas, by=c('prod_type', 'fed.land', 'offshore', 'spud_month'), all.x=TRUE)
spudcounts = merge(spudcounts, spudcounts.lowgas, by=c('prod_type', 'fed.land', 'offshore', 'spud_month'), all.x=TRUE)

spudcounts[, moy := month(spud_month)]

spudcounts = merge(spudcounts, prices, by.x='spud_month', by.y='date')

plot.months = as.Date(intersect(spudcounts[, unique(spud_month)], prices[, unique(date)]))
spudcounts.plot = spudcounts[spud_month %in% plot.months]
prices.plot = prices[date %in% plot.months]
for (o in 0:1) {
  pdf(output.dir%&%'/Spudcounts_'%&%ifelse(o==0, 'Onshore','Offshore')%&%'_'%&%today()%&%'.pdf', family='serif', width=11, height=8.5)
  par(mai=c(0.8, 0.9, 0.5, 1.1))
  plot(N ~ spud_month, data=spudcounts.plot[prod_type=='Oil' & fed.land==0 & offshore==o], type='l', lty=1, lwd=3, col=my.cols[1],
       xlab='Month', ylab='Number of Wells Drilled', cex.axis=1.5, cex.lab=1.5, main='', ylim=c(0, ifelse(o==0, 3000, 125)))
  grid(nx=NA, ny=NULL)
  abline(h=0)
  lines(N ~ spud_month, data=spudcounts.plot[prod_type=='Oil' & fed.land==1 & offshore==o], type='l', lty=1, lwd=3, col=my.cols[2])
  lines(N ~ spud_month, data=spudcounts.plot[prod_type=='Gas' & fed.land==0 & offshore==o], type='l', lty=1, lwd=3, col=my.cols[3])
  lines(N ~ spud_month, data=spudcounts.plot[prod_type=='Gas' & fed.land==1 & offshore==o], type='l', lty=1, lwd=3, col=my.cols[4])
  par(new=TRUE)
  plot(wti ~ date, data=prices.plot, type='l', lwd=3, col='black',
       xlab='', main='', xaxt='n', yaxt='n', ylim=c(0, 180), ylab='', lty=5)
  lines(hh*6 ~ date, data=prices.plot, type='l', lty=5, lwd=3, col='blue')
  axis(4, cex.axis=1.5)
  corners = par('usr')
  par(xpd=TRUE)
  text(x = corners[2]+1000, y = mean(corners[3:4]), 
       'Oil and Gas Price \n (2020$ per barrel of oil equivalent)', cex=1.5, 
       srt = 270)
  legend('topleft',legend=c('Oil Wells, Nonfederal', 'Oil Wells, Federal',
                            'Gas Wells, Nonfederal', 'Gas Wells, Federal'),
         col=my.cols, lwd=3, lty=1, bty='n', cex=1.5)
  legend('topright',legend=c('WTI Oil Price', 'Henry Hub Gas Price'), col=c('black','blue'),
         lty=5, lwd=3, bty='n', cex=1.5)
  dev.off()
}

rm(spudcounts.plot)
rm(prices.plot)
gc()
